package com.yuntsg.nnsfcp.test.pdf;

import lombok.SneakyThrows;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;

/**
 * @Author: 葛成瑞
 * @Date: 2024/9/23 13:14
 */
public class apachePdfPDDocumentoText {
    @SneakyThrows
    public static void main(String[] args) {
        // pdf 解析测试
        String pdfPath = "D:/Users/55419/Desktop/杂乱/国自然原始文件.pdf";
        File file = new File(pdfPath);
        PDDocument doc = Loader.loadPDF(file);
        int numberOfPages = doc.getNumberOfPages();
        PDFTextStripper stripper = new PDFTextStripper();
        // 表格数据提取是由大问题 需要换个工具
        for (int i = 0; i < numberOfPages; i++) {
            stripper.setStartPage(i);
            stripper.setEndPage(i);
            String text = stripper.getText(doc);
            System.out.println(text);

        }

    }

}
